R Markdown

Load/install the following packages:

library("tidyverse")
library("ggridges")
library("rcartocolor")
library("glue")
library("knitr")

theme_set(theme_minimal())

Read in the data

earnings <- read_csv("data/school-earnings.csv")

kable(head(earnings, n = 10))
School Gap Gender Pay
Berkeley 17 Men 88
Berkeley 17 Women 71
Brown 20 Men 92
Brown 20 Women 72
Chicago 40 Men 118
Chicago 40 Women 78
Columbia 33 Men 119
Columbia 33 Women 86
Cornell 27 Men 107
Cornell 27 Women 80

Final Product




How do we get there?

Step 1: Basic aesthetic mappings

ggplot(earnings, aes(x = Pay, y = School))

Step 2: Line segments

ggplot(earnings, aes(x = Pay, y = School)) +
  geom_line(size = 1.5, color = "gray70")

Step 3: Reordering schools

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70")

Step 4: Colored points for women and men

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(aes(color = Gender), size = 4)

Step 5: Larger white points underneath colored points add visual gaps between segments and colored points

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4)

Step 6a: Basic text labels

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(aes(label = Gap), color = "gray70") 

Step 6b: Only show labels by points for men, and shift it to the right (add 2 to our x mapping)

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(data = filter(earnings, Gender == "Men"),
                           aes(x = Pay + 2, label = Gap), 
             color = "gray70") 

Step 6c: Remove label border (label.size = NA) and left-align text using hjust (hjust = 0 for left justified, hjust = 0.5 for centered, hjust = 1 for right justified)

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(data = filter(earnings, Gender == "Men"),
                           aes(x = Pay + 2, label = Gap), 
             color = "gray70",
             hjust = 0,
             label.size = NA) 

Step 6d: Use the glue package to create the final text annotation

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(data = filter(earnings, Gender == "Men"),
             aes(x = Pay + 2,
                 label = glue("Men earn ${Gap}k more than women")),
             color = "gray70",
             hjust = 0,
             label.size = NA) 

Step 7: Expand x axis to include the right-most text using expand_limits()

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(data = filter(earnings, Gender == "Men"),
             aes(x = Pay + 2,
                 label = glue("Men earn ${Gap}k more than women")),
             color = "gray70",
             hjust = 0,
             label.size = NA) +
  expand_limits(x = 200)

Step 8: Nicer color scale using the rcartocolor package (see carto palettes here)

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(data = filter(earnings, Gender == "Men"),
             aes(x = Pay + 2,
                 label = glue("Men earn ${Gap}k more than women")),
             color = "gray70",
             hjust = 0,
             label.size = NA) +
  expand_limits(x = 200) +
  scale_color_carto_d(palette = "Bold", name = NULL)

Step 9: Set axis labels

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(data = filter(earnings, Gender == "Men"),
             aes(x = Pay + 2,
                 label = glue("Men earn ${Gap}k more than women")),
             color = "gray70",
             hjust = 0,
             label.size = NA) +
  expand_limits(x = 200) +
  scale_color_carto_d(palette = "Bold", name = NULL) +
  labs(x = "Median Mid-career Salary", y = NULL)

Step 10: Formatted tick mark labels on x-axis

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(data = filter(earnings, Gender == "Men"),
             aes(x = Pay + 2,
                 label = glue("Men earn ${Gap}k more than women")),
             color = "gray70",
             hjust = 0,
             label.size = NA) +
  expand_limits(x = 200) +
  scale_color_carto_d(palette = "Bold", name = NULL) +
  labs(x = "Median Mid-career Salary", y = NULL) +
  scale_x_continuous(breaks = c(100, 150, 200), 
                     labels = c("$100k", "$150k", "$200k"))

Step 11: Nicer theme (provided by ggridges package)

ggplot(earnings, aes(x = Pay, y = fct_reorder(School, Pay, .fun = "min"))) +
  geom_line(size = 1.5, color = "gray70") +
  geom_point(size = 6, color = "white") +
  geom_point(aes(color = Gender), size = 4) +
  geom_label(data = filter(earnings, Gender == "Men"),
             aes(x = Pay + 2,
                 label = glue("Men earn ${Gap}k more than women")),
             color = "gray70",
             hjust = 0,
             label.size = NA) +
  expand_limits(x = 200) +
  scale_color_carto_d(palette = "Bold", name = NULL) +
  labs(x = "Median Mid-career Salary", y = NULL) +
  scale_x_continuous(breaks = c(100, 150, 200), 
                     labels = c("$100k", "$150k", "$200k")) +
  theme_ridges()